{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "source": [
    "# Language Models\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "attributes": {
     "classes": [],
     "id": "",
     "n": "3"
    },
    "origin_pos": 3,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "from d2l import torch as d2l"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Read minibatches of input sequences and target sequences at random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "attributes": {
     "classes": [],
     "id": "",
     "n": "7"
    },
    "origin_pos": 10,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X: tensor([[16, 19,  6,  0, 21,  9,  2, 15,  0,  2],\n",
      "        [ 2, 19,  8,  6,  0, 21,  9, 10, 15,  8]]) \n",
      "Y: tensor([[19,  6,  0, 21,  9,  2, 15,  0,  2,  0],\n",
      "        [19,  8,  6,  0, 21,  9, 10, 15,  8,  0]])\n"
     ]
    }
   ],
   "source": [
    "@d2l.add_to_class(d2l.TimeMachine)  \n",
    "def __init__(self, batch_size, num_steps, num_train=10000, num_val=5000):\n",
    "    super(d2l.TimeMachine, self).__init__()\n",
    "    self.save_hyperparameters()\n",
    "    corpus, self.vocab = self.build(self._download())\n",
    "    array = torch.tensor([corpus[i:i+num_steps+1]\n",
    "                        for i in range(0, len(corpus)-num_steps-1)])\n",
    "    self.X, self.Y = array[:,:-1], array[:,1:]\n",
    "\n",
    "@d2l.add_to_class(d2l.TimeMachine)  \n",
    "def get_dataloader(self, train):\n",
    "    idx = slice(0, self.num_train) if train else slice(\n",
    "        self.num_train, self.num_train + self.num_val)\n",
    "    return self.get_tensorloader([self.X, self.Y], train, idx)\n",
    "\n",
    "data = d2l.TimeMachine(batch_size=2, num_steps=10)\n",
    "for X, Y in data.train_dataloader():\n",
    "    print('X:', X, '\\nY:', Y)\n",
    "    break"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "language_info": {
   "name": "python"
  },
  "rise": {
   "autolaunch": true,
   "enable_chalkboard": true,
   "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
   "scroll": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}